library(readr)
library(haven)
library(dplyr)
library(skimr)
library(gtsummary)
############################### 数据筛选 #######################################
#获取2017至2018年所有研究对象
setwd('D:/rCode/论文实战/nhanes')
setwd('G:/BaiduNetdiskDownload/NHANES')

demo <- read_xpt("2017-2018/Demographics/demo_j.xpt")
Examination <- read_xpt("2017-2018/Examination/lux_j.xpt")
Laboratory <- read_xpt("2017-2018/Laboratory/hepbd_j.xpt")
Laboratory1 <- read_xpt("2017-2018/Laboratory/hepc_j.xpt")
Questionnaire <- read_xpt("2017-2018/Questionnaire/osq_j.xpt")
ALQ <- read_xpt("2017-2018/Questionnaire/alq_j.xpt")
AST <- read_xpt("2017-2018/Laboratory/biopro_j.xpt")
bmi <- read_xpt("2017-2018/Examination/bmx_j.xpt")
diq <- read_xpt("2017-2018/Questionnaire/diq_j.xpt")
bpq <- read_xpt("2017-2018/Questionnaire/bpq_j.xpt")
Laboratory2 <- read_xpt("2017-2018/Laboratory/trigly_j.xpt")
smq <- read_xpt("2017-2018/Questionnaire/smq_j.xpt")
Laboratory3 <- read_xpt("2017-2018/Laboratory/tchol_j.xpt")
Laboratory4 <- read_xpt("2017-2018/Laboratory/hdl_j.xpt")
Laboratory5 <- read_xpt("2017-2018/Laboratory/glu_j.xpt")
mcq <- read_xpt("2017-2018/Questionnaire/mcq_j.xpt")

output <- plyr::join_all(list(demo, Examination,Laboratory,Laboratory1,Questionnaire,ALQ,AST,bmi
                              ,bpq,diq,Laboratory2,smq,Laboratory3,Laboratory4,Laboratory5,mcq),
                         by='SEQN',type='full')
#合并数据并同时筛选掉弹性成像测量条件的数据6401
output <- merge(demo,Examination,by="SEQN")
output <- merge(output,Laboratory,by="SEQN")
output <- merge(output,Laboratory1,by="SEQN")
output <- merge(output,Questionnaire,by="SEQN",all.x=T)
output <- merge(output,ALQ,by="SEQN",all.x=T)
output <- merge(output,AST,by="SEQN",all.x=T)
output <- merge(output,bmi,by="SEQN",all.x=T)
output <- merge(output,bpq,by="SEQN",all.x=T)
output <- merge(output,diq,by="SEQN",all.x=T)
output <- merge(output,Laboratory2,by="SEQN",all.x=T)
output <- merge(output,smq,by="SEQN",all.x=T)
output <- merge(output,Laboratory3,by="SEQN",all.x=T)
output <- merge(output,Laboratory4,by="SEQN",all.x=T)
output <- merge(output,Laboratory5,by="SEQN",all.x=T)
output <- merge(output,mcq,by="SEQN",all.x=T)
dim(output)
output$RIDRETH3
skim(output$LBDHBG)
colnames(output)
outputtemp <- output[,c('SEQN','LBDHBG','RIDRETH3')]

outputtempna1<-outputtemp[-outputtemp$LBDHBG==1,]
outputtemp$LBDHBG <- ifelse(is.na(outputtemp$LBDHBG),0,2)
outputtempLBDHBG <- outputtempfix[outputtempfix$LBDHBG!=1,]


skim(outputtemp)

RIDRETH3na <- which(!is.na(output$RIDRETH3))
LUAXSTATna <- which((output$LUAXSTAT))
LBDHBGna <- which(is.na(output$LBDHBG))
RIDRETH3na <- which(!is.na(output$RIDRETH3))

table(output$RIDRETH3)
View(output)
#弹性成像检查筛选5494
output2 <- output[output$LUAXSTAT==1,]
a <- output[output$LUAXSTAT==1,]
View(a)
#年龄筛选4746
output <- output[output$RIDAGEYR>=18,]
#乙肝筛选4719(缺1)
output <- output[output$LBDHBG!=1,]
#丙肝筛选(两个都对不上,后面再看)
#a <- output[output$LBDHCI!=1,]#4704
#a <- output[output$LBXHCR!=1,]#4698
output <- output[output$LBXHCR!=1,]#4698
#饮酒数据筛选(数据对不上)
output <- output[output$ALQ130<=1,]#4613
#药物筛选(没找到相应的变量数据无法匹配)
#a <- output[output$OSQ130!=1,]#4705
#a <- output[output$OSQ140Q<3,]#4711
#a <- output[output$OSQ140U==1,]#4719
output <- output[output$OSQ130!=1,]#4705
dim(output)
output$Sex <- ifelse(output$RIAGENDR==1,'male','female')
output$Hypertriglyceridemia <- ifelse(output$LBDTRSI>1.7,'1','0')
output$CentralObesity <- ifelse(output$Sex=='male' & output$BMXWAIST>90,'1',
                                ifelse((output$Sex=='female' & output$BMXWAIST>85.0),'1','2'))
output <- mutate(output, SmokingStatus = case_when(
  SMQ020 == 2 ~ 'Never smoker',
  SMQ020 == 1 & SMQ040 == 3 ~ 'Former smoker',
  SMQ020 == 1 & SMQ040 <= 2 ~ 'Current smoker'
))
output$MetS <- 0
a <- 1
for (a in 1:dim(output)) {
  dd <- output[a,]
  ce <- dd[c('Sex','BMXWAIST','RIDRETH3')]
  num <- 0
  c <- which(is.na(ce)==TRUE)
  if(length(c)==0){
    print(1)
    if(dd$Sex=='male' & dd$BMXWAIST>=102 & dd$RIDRETH3!=6){
      num <- num+1
    }
    if(dd$Sex=='female' & dd$BMXWAIST>=88 & dd$RIDRETH3!=6){
      print(2)
      num <- num+1
    }
    if(dd$Sex=='male' & dd$BMXWAIST>=90 & dd$RIDRETH3==6){
      num <- num+1
    }
    if(dd$Sex=='female' & dd$BMXWAIST>=80 & dd$RIDRETH3==6){
      num <- num+1
    }
  }
  ce <- dd[c('LBXSTR')]
  c <- which(is.na(ce)==TRUE)
  if(length(c)==0){
    if(dd$LBXSTR>150){
      num <- num+1
    }
  }
  ce <- dd[c('Sex','LBDHDD')]
  c <- which(is.na(ce)==TRUE)   
  if(length(c)==0){
    if(dd$Sex=='male' & dd$LBDHDD<40){
      num <- num+1
    }
    if(dd$Sex=='female' & dd$LBDHDD<50){
      num <- num+1
    }
  }
  ce <- dd[c('BPQ020')]
  c <- which(is.na(ce)==TRUE)   
  if(length(c)==0){
    if(dd$BPQ020==1){
      num <- num+1
    }
  }
  ce <- dd[c('LBXGLU','DIQ010')]
  c <- which(is.na(ce)==TRUE)   
  if(length(c)==0){
    if(dd$LBXGLU>=110 | dd$DIQ010==1){
      num <- num+1
    }
  }
  
  print(paste0('a=',a))
  print(paste0('num=',num))
  if(num>=3){
    output[a,]$MetS <- 1
  }
}
output$LUXSMED#lsm
output$LUXCAPM#CAP
output$LBXSASSI#AST
output$NAFLD <- 0
for (a in 1:dim(output)) {
  dd <- output[a,]
  ce <- dd[c('LUXCAPM','LUXSMED')]
  c <- which(is.na(ce)==TRUE)   
  if(length(c)==0){
    if(dd$LUXCAPM>=285 | dd$LUXSMED>=8.6){
      output[a,]$NAFLD <- 1
    }
  }
}
MCQ510b
View(data)
output$RIDRETH1
#                    年龄     性别   BMI      腰围      MetS    高血压  高血糖      高血脂                  腹部肥胖
data <- output[,c('RIDAGEYR','Sex','BMXBMI','BMXWAIST','MetS','BPQ020','DIQ010','Hypertriglyceridemia','CentralObesity',
#                     吸烟状态    总胆固醇 甘油三酯 HDL-C   空腹血糖    ALT       AST          ALP      GGT
                  'SmokingStatus','LBXTC','LBXSTR','LBDHDD','LBXGLU','LBXSATSI','LBXSASSI','LBXSAPSI','LBXSGTSI',
#                  白蛋白   总胆红素   CAP      LSM  非酒精性肝病 种族
                  'LBXSAL','LBXSTB','LUXCAPM','LUXSMED','NAFLD','RIDRETH3')]
View(data)
lbxgh
output$lbxg
write.csv(data, "paper4.csv")
getwd()
str(data)
#output$LUXSMED#lsm
#output$LUXCAPM#CAP
#output$LBXSASSI#AST
output$RIDRETH3


################################## 基线图 ######################################
library(arsenal)
getwd()
setwd("D:/rCode/论文实战/paper4")
demo <- read_csv("paper4.csv")
View(demo)
demo$FAST <- ifelse(demo$FASTscore<35,'FAST score <0.35','FAST score >=0.35')
demo$RIDRETH3
colnames(demo) <- c('id','Age','Sex','BMI','WaistCircumference','Mets','HighBloodPressure',
                    'Hyperglycemis','Hypertriglyceridemia','CentralObesity','SmokingStatus',
                    'TotalCholesterol','Triglycerides','HDLC','FastingGlucose','ALT',
                    'AST','ALP','GGT','Albumin','TotalBilirubin','CAP','LSM','NAFLD',
                    'FASTscore','race','FAST')
fml <- as.formula(paste0('FAST~',paste0(colnames(demo[,c(2:23)]),collapse = '+')))
tab1 <- tableby(fml ,data = demo,
                control=tableby.control(total=TRUE,#是否展示总数
                                        cat.simplify=FALSE,#是否只展示1的个数及百分比(不能有空值否则无效)
                                        digits=2))
summary(tab1, text=TRUE)
################################### 图1A #######################################
library(ggplot2)
install.packages("ggprism")
library(ggprism)
x <- demo[,2:5]
demo %>% 
  ggplot(aes(x = Sex+HighBloodPressure, y = Age,fill = factor(FAST))) + 
  stat_summary(geom = "col", fun = mean,aes(fill = factor(FAST))) + 
  stat_summary(geom = "errorbar", 
               fun = mean,
               fun.min = function(x) mean(x) - sd(x),
               fun.max = function(x) mean(x) + sd(x),
               width = 0.3)
  
colnames(demo)
install.packages("ggthemr")
library(ggthemr)
library(ggsignif)







########################## 表2 血液参数与NAFLD关系 #############################
colnames(demo) <- c('id','Age','Sex','BMI','WaistCircumference','Mets','HighBloodPressure',
                    'Hyperglycemis','Hypertriglyceridemia','CentralObesity','SmokingStatus',
                    'TotalCholesterol','Triglycerides','HDLC','FastingGlucose','ALT',
                    'AST','ALP','GGT','Albumin','TotalBilirubin','CAP','LSM','NAFLD',
                    'FASTscore','FAST')
demo$FASTint <- ifelse(demo$FAST=='FAST score <0.35',0,1)
demo$FASTint <- as.factor(demo$FASTint)
dev <- demo[demo$FASTint==0,]
vad <- demo[demo$FASTint==1,]
fml <- as.formula(paste0('NAFLD==0~',paste0(colnames(demo[,c(13,14,16:21)]),collapse = '+')))
demo$NAFLD
str(demo)
modelA<-glm(fml,data = dev,family=binomial)
model1 <-tbl_regression(modelA,exponentiate = T)
fml <- as.formula(paste0('NAFLD==1~',paste0(colnames(demo[,c(13,14,16:21)]),collapse = '+')))

modelB<-glm(fml,data = vad,family=binomial)
model2 <-tbl_regression(modelB,exponentiate = T)
table2 <- tbl_merge(list(model1,model2))
#table2%>%as_flex_table()%>%flextable::save_as_html(path = 'sP.html')








 



